Librerie:

set.seed(050701)
library(foreign)
library(rms)
library(arm)
library(ResourceSelection)
library(pROC)
library(PRROC)
library(ROCR)
library(readr)
library(dplyr)
library(tidyr)
library(GGally)
library(heatmaply)
library(plotly)
library(ggplot2)
library(gridExtra)
library(ggpubr)
library(RColorBrewer)
library(scales)
library(ggmap)
library(countrycode)
library(regclass)
library(bestglm)
library(OddsPlotty)

1) PRESENTAZIONE, IMPORTAZIONE, PULIZIA

Importo e pulisco il dataset:

c_data <- read_csv("Speed Dating Data.csv")
Rows: 8378 Columns: 195
── Column specification ───────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr   (4): field, undergra, from, career
dbl (187): iid, id, gender, idg, condtn, wave, round, position, positin1, order, partner, pid, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#eliminiamo i duplicati
#qui abbiamo il dataset completo
c_data <- c_data %>% group_by(iid) %>% filter (! duplicated(iid))

#features che ci interessano:
data=subset(c_data,select=c(attr,sinc,intel,fun,amb,shar,dec,like,samerace,int_corr,prob,race,gender,age,age_o,income,goal,go_out,date,met))
#voglio aggiungere la variabile d_age con la differenza di età tra l'individuo considerato e il partner
data$d_age=abs(data$age-data$age_o) 

#per le features che ci interessano eliminiamo gli na
print(sapply(data,function(x) sum(length(which(is.na(x))))))
    attr     sinc    intel      fun      amb     shar      dec     like samerace int_corr     prob 
       9       14       14       21       40       69        0       12        0        7       15 
    race   gender      age    age_o   income     goal   go_out     date      met    d_age 
       6        0        8        0      270        7        7        8       17        8 
#tolgo la variabile income perchè ho molti na 
data$income=NULL
data$shar=NULL

#elimino gli na: eliminate 60 osservazioni 
data <- na.omit(data)

#decodifico race:
data$race=as.factor(data$race)
levels(data$race)
[1] "1" "2" "3" "4" "6"
data$race=recode(data$race, '1' = 'Black', '2' = 'White','3' = 'Hispanic','4' = 'Asian','6'= 'Other')

#decodifico gender:
data$gender=as.factor(data$gender)
levels(data$gender)
[1] "0" "1"
data$gender=recode(data$gender, '1' = 'Male', '0' = 'Female')

#decodifico goal:
data$goal=as.factor(data$goal)
levels(data$goal)
[1] "1" "2" "3" "4" "5" "6"
data$goal=recode(data$goal, '1'='Fun', '2'='Meet', '3'='Date', '4'= 'Relationship', '5'= 'IdidIt', '6'= 'Other')

#decodifico go_out: (abitudini sociali: quanto escono alla settimana)
data$go_out=as.factor(data$go_out)
levels(data$go_out)
[1] "1" "2" "3" "4" "5" "6" "7"
data$go_out=recode(data$go_out, '1'='Several_pw', '2'='Twice_pw', '3'='Once_pw', '4'= 'Twice_pm', '5'= 'Once_pm', '6'= 'Several_py','7'='Almost_never')

#decodifico date: (abitudini negli appuntamenti: a quanti appuntamenti vanno)
data$date=as.factor(data$date)
levels(data$date)
[1] "1" "2" "3" "4" "5" "6" "7"
data$date=recode(data$date, '1'='Several_pw', '2'='Twice_pw', '3'='Once_pw', '4'= 'Twice_pm', '5'= 'Once_pm', '6'= 'Several_py','7'='Almost_never')

#decodifico samerace
data$samerace=as.factor(data$samerace)
levels(data$samerace)
[1] "0" "1"
data$samerace=recode(data$samerace, '0'='NO', '1'='SI')

2) ANALISI ESPLORATIVA

Chi sono le persone presenti nel campione che stiamo analizzando:

attach(data)
I seguenti oggetti sono mascherati da data (pos = 3):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 4):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc
#maschi e femmine

mf_fig=ggplot(data,aes(gender))+geom_bar(aes(fill=gender))+scale_fill_brewer(palette='Pastel1')+theme(legend.position="none")


#età per genere
age_fig=ggplot(data,aes(age))+geom_bar(aes(fill=gender))+scale_fill_brewer(palette='Pastel1')

#etnia
etnia_fig=ggplot(data,aes(race))+geom_bar(aes(fill=race))+theme(legend.position="none")+scale_fill_brewer(palette='Pastel1')

#ABITUDINI, SCOPI
#go_out
goout_fig=ggplot(data,aes(go_out))+geom_bar(aes(fill=go_out))+theme(legend.position="none")+scale_fill_brewer(palette='Pastel1')

#date
date_fig=ggplot(data,aes(date))+geom_bar(aes(fill=date))+theme(legend.position="none")+scale_fill_brewer(palette='Pastel1')

#goal
goal_fig=ggplot(data,aes(goal))+geom_bar(aes(fill=goal))+theme(legend.position="none")+scale_fill_brewer(palette='Pastel1')

#figura complessiva
ggarrange(mf_fig,age_fig,etnia_fig,goout_fig,date_fig,goal_fig)


detach(data)

Rappresentiamo il legame tra le risposte al questionario e la decisione finale dell’individuo:

attach(data)
I seguenti oggetti sono mascherati da data (pos = 3):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 4):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc
#dec vs attr
y_attr=tapply(dec,attr,mean)
decvsattr_fig=ggplot()+geom_count(aes(attr, dec))+geom_count(aes(sort(unique(attr)), y_attr, colour='red',size=3))+labs(x='attr',y='dec',title='Dec vs Attr')+theme_light()+theme(legend.position="none")

#dec vs sinc 
y_sinc=tapply(dec,sinc,mean)
decvssinc_fig=ggplot()+geom_count(data = data, aes(sinc, dec))+geom_count(aes(sort(unique(sinc)), y_sinc,color='red',size=3))+labs(x='sinc',y='dec',title='Dec vs Sinc')+theme_light()+theme(legend.position="none")

#dec vs intel
y_intel=tapply(dec,intel,mean)
decvsintel_fig=ggplot()+geom_count(data = data, aes(intel, dec))+geom_count(aes(sort(unique(intel)), y_intel,color='red',size=3))+labs(x='intel',y='dec',title='Dec vs Intel')+theme_light()+theme(legend.position="none")

#dec vs fun
y_fun=tapply(dec,fun,mean)
decvsfun_fig=ggplot()+geom_count(data = data, aes(fun, dec))+geom_count(aes(sort(unique(fun)), y_fun,color='red',size=3))+labs(x='fun',y='dec',title='Dec vs Fun')+theme_light()+theme(legend.position="none")

#dec vs amb
y_amb=tapply(dec,amb,mean)
decvsamb_fig=ggplot()+geom_count(data = data, aes(amb, dec))+geom_count(aes(sort(unique(amb)), y_amb,color='red',size=3))+labs(x='amb',y='dec',title='Dec vs Amb')+theme_light()+theme(legend.position="none")

#dec vs like
y_like=tapply(dec,like,mean)
decvslike_fig=ggplot()+geom_count(data = data, aes(like, dec))+geom_count(aes(sort(unique(like)), y_like,color='red',size=3))+labs(x='like',y='dec',title='Dec vs Like')+theme_light()+theme(legend.position="none")

#dec vs prob
y_prob=tapply(dec,prob,mean)
decvsprob_fig=ggplot()+geom_count(data = data, aes(prob, dec))+geom_count(aes(sort(unique(prob)), y_prob,color='red',size=3))+labs(x='prob',y='dec',title='Dec vs Prob')+theme_light()+theme(legend.position="none")

#immagine completa
ggarrange(decvsattr_fig,decvssinc_fig,decvsintel_fig,decvsfun_fig,decvsamb_fig,decvslike_fig,decvsprob_fig,nrow=2)
$`1`

$`2`

$`3`

$`4`

attr(,"class")
[1] "list"      "ggarrange"

detach(data)

Vediamo ora come influenzano la risposta le variabili categoriche gender, race, samerace, date, goal, go_out:

Valutiamo l’impatto della variabile int_cor sulla decisione degli individui:

attach(data)
I seguenti oggetti sono mascherati da train (pos = 41):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel,
    like, met, prob, race, samerace, sinc

I seguenti oggetti sono mascherati da train (pos = 42):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel,
    like, met, prob, race, samerace, sinc
x=seq(-1,1,0.15)
mid=c((x[2:length(x)]+x[1:(length(x)-1)])/2)
classi=cut(int_corr,breaks=x,include.lowest=TRUE,right=FALSE)

y=tapply(dec,classi,mean)
y
  [-1,-0.85) [-0.85,-0.7) [-0.7,-0.55) [-0.55,-0.4) [-0.4,-0.25) [-0.25,-0.1)  [-0.1,0.05) 
          NA           NA    1.0000000    0.3333333    0.4347826    0.3469388    0.4259259 
  [0.05,0.2)   [0.2,0.35)   [0.35,0.5)   [0.5,0.65)   [0.65,0.8)   [0.8,0.95] 
   0.4637681    0.4270833    0.4315789    0.4137931    0.2500000    0.0000000 
fig=ggplot()+geom_point(aes(int_corr,dec))+geom_point(aes(mid,y,color='red'))
fig
Warning: Removed 2 rows containing missing values (geom_point).

detach(data)

Concludiamo osservando l’andamento della decisione in base all’età dei pratecipanti e alla differenza di età tra partecipanti e partner:

attach(data)
I seguenti oggetti sono mascherati da train (pos = 41):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel,
    like, met, prob, race, samerace, sinc

I seguenti oggetti sono mascherati da train (pos = 42):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel,
    like, met, prob, race, samerace, sinc
x=seq(min(age),max(age),2)
mid=c((x[2:length(x)]+x[1:(length(x)-1)])/2)
classi=cut(age,breaks=x,include.lowest=TRUE,right=FALSE)

y=tapply(dec,classi,mean)
y
  [18,20)   [20,22)   [22,24)   [24,26)   [26,28)   [28,30)   [30,32)   [32,34)   [34,36) 
0.3333333 0.6153846 0.4361702 0.4200000 0.3431373 0.4545455 0.3333333 0.4782609 0.1818182 
  [36,38)   [38,40)   [40,42)   [42,44)   [44,46)   [46,48)   [48,50)   [50,52)   [52,54] 
1.0000000 0.0000000        NA 0.0000000        NA        NA        NA        NA        NA 
fig=ggplot()+geom_count(aes(age,dec))+geom_point(aes(mid,y,color='red'))
fig
Warning: Removed 6 rows containing missing values (geom_point).

detach(data)
attach(data)
I seguenti oggetti sono mascherati da train (pos = 41):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel,
    like, met, prob, race, samerace, sinc

I seguenti oggetti sono mascherati da train (pos = 42):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel,
    like, met, prob, race, samerace, sinc
x=seq(min(d_age),max(d_age),)
mid=c((x[2:length(x)]+x[1:(length(x)-1)])/2)
classi=cut(d_age,breaks=x,include.lowest=TRUE,right=FALSE)

y=tapply(dec,classi,mean)
y
    [0,1)     [1,2)     [2,3)     [3,4)     [4,5)     [5,6)     [6,7)     [7,8)     [8,9) 
0.3750000 0.4404762 0.4864865 0.3888889 0.3548387 0.4042553 0.4146341 0.3571429 0.2000000 
   [9,10)   [10,11)   [11,12)   [12,13)   [13,14)   [14,15)   [15,16)   [16,17)   [17,18) 
0.5454545 0.3333333 0.3333333 0.6666667 1.0000000 0.5000000 0.5000000        NA        NA 
  [18,19)   [19,20)   [20,21)   [21,22] 
0.0000000        NA        NA 1.0000000 
fig=ggplot()+geom_point(aes(mid,y,color='red'))+geom_count(aes(d_age,dec))+labs(x="d_age",y="dec")
fig
Warning: Removed 4 rows containing missing values (geom_point).

detach(data)

Correlazione tra le varie features:

3) COSTRUZIONE DEL MODELLO

Prima di iniziare a costruire il modello dividiamo il dataset in training set e test set per la cross-validazione: lavoreremo sul training dataset

smp_size <- floor(0.8 * nrow(data))

## set the seed to make your partition reproducible
train_ind <- sample(seq_len(nrow(data)), size = smp_size)

train <- data[train_ind, ]
test <- data[-train_ind, ]

Fittiamo un modello di regressione logistica per vedere quali variabili sono significative per predire la decisione di un individuo:

attach(train)
I seguenti oggetti sono mascherati da data (pos = 3):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 4):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 5):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 6):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 7):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 8):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 9):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc

I seguenti oggetti sono mascherati da data (pos = 10):

    age, age_o, amb, attr, d_age, date, dec, fun, gender, go_out, goal, int_corr, intel, like, met, prob, race,
    samerace, sinc
mod0=glm(dec~attr+sinc+intel+fun+amb+like+samerace+int_corr+prob+race+gender+d_age+met+date+goal+go_out,family=binomial(link="logit"),train)
summary(mod0)

Call:
glm(formula = dec ~ attr + sinc + intel + fun + amb + like + 
    samerace + int_corr + prob + race + gender + d_age + met + 
    date + goal + go_out, family = binomial(link = "logit"), 
    data = train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.2022  -0.6226  -0.1615   0.6162   3.0090  

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -6.980569   1.644606  -4.245 2.19e-05 ***
attr                 0.400795   0.119623   3.350 0.000807 ***
sinc                -0.313997   0.131192  -2.393 0.016692 *  
intel                0.039027   0.160043   0.244 0.807342    
fun                  0.382924   0.120014   3.191 0.001419 ** 
amb                 -0.211164   0.114173  -1.850 0.064385 .  
like                 0.555237   0.144658   3.838 0.000124 ***
sameraceSI          -0.441363   0.331220  -1.333 0.182683    
int_corr            -0.428969   0.498617  -0.860 0.389614    
prob                 0.318293   0.080675   3.945 7.97e-05 ***
raceWhite           -0.897190   0.676949  -1.325 0.185057    
raceHispanic         0.212746   0.849669   0.250 0.802288    
raceAsian            0.175952   0.703034   0.250 0.802375    
raceOther           -0.505070   0.810567  -0.623 0.533215    
genderMale           0.632045   0.315055   2.006 0.044841 *  
d_age               -0.005320   0.050064  -0.106 0.915372    
met                  0.064430   0.156108   0.413 0.679806    
dateTwice_pw         1.692848   1.276525   1.326 0.184794    
dateOnce_pw          0.007163   1.114239   0.006 0.994871    
dateTwice_pm         0.385804   1.078448   0.358 0.720538    
dateOnce_pm          0.040669   1.105836   0.037 0.970663    
dateSeveral_py       0.501765   1.094166   0.459 0.646535    
dateAlmost_never    -0.260916   1.109628  -0.235 0.814102    
goalMeet             0.292958   0.342215   0.856 0.391963    
goalDate             0.076765   0.619920   0.124 0.901450    
goalRelationship     2.514666   0.918707   2.737 0.006197 ** 
goalIdidIt           0.431130   0.634799   0.679 0.497037    
goalOther            0.025158   0.614849   0.041 0.967362    
go_outTwice_pw      -0.694158   0.379046  -1.831 0.067052 .  
go_outOnce_pw       -0.109607   0.417724  -0.262 0.793020    
go_outTwice_pm      -0.269284   0.706746  -0.381 0.703188    
go_outOnce_pm       -1.352927   1.046267  -1.293 0.195977    
go_outSeveral_py    -1.198028   1.585128  -0.756 0.449774    
go_outAlmost_never -10.418034 790.291377  -0.013 0.989482    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 517.20  on 383  degrees of freedom
Residual deviance: 310.82  on 350  degrees of freedom
AIC: 378.82

Number of Fisher Scoring iterations: 14
modf=step(mod0,direction="both",scope=~attr+sinc+intel+fun+amb+like+samerace+int_corr+prob+race+date+gender+d_age+met+goal+go_out )
Start:  AIC=378.82
dec ~ attr + sinc + intel + fun + amb + like + samerace + int_corr + 
    prob + race + gender + d_age + met + date + goal + go_out

           Df Deviance    AIC
- go_out    6   315.98 371.98
- date      6   318.61 374.61
- d_age     1   310.83 376.83
- intel     1   310.88 376.88
- met       1   310.99 376.99
- int_corr  1   311.56 377.56
- goal      5   319.84 377.84
- samerace  1   312.61 378.61
<none>          310.82 378.82
- amb       1   314.30 380.30
- gender    1   314.89 380.89
- race      4   320.97 380.97
- sinc      1   316.76 382.76
- fun       1   321.28 387.28
- attr      1   323.22 389.22
- like      1   327.48 393.48
- prob      1   327.52 393.52

Step:  AIC=371.98
dec ~ attr + sinc + intel + fun + amb + like + samerace + int_corr + 
    prob + race + gender + d_age + met + date + goal

           Df Deviance    AIC
- date      6   325.06 369.06
- intel     1   315.98 369.98
- d_age     1   315.99 369.99
- met       1   316.03 370.03
- goal      5   324.51 370.51
- int_corr  1   316.72 370.72
<none>          315.98 371.98
- race      4   324.44 372.44
- samerace  1   318.53 372.53
- gender    1   318.82 372.82
- amb       1   319.35 373.35
- sinc      1   322.49 376.49
+ go_out    6   310.82 378.82
- fun       1   325.87 379.87
- attr      1   329.11 383.11
- prob      1   331.15 385.15
- like      1   333.92 387.92

Step:  AIC=369.06
dec ~ attr + sinc + intel + fun + amb + like + samerace + int_corr + 
    prob + race + gender + d_age + met + goal

           Df Deviance    AIC
- met       1   325.06 367.06
- intel     1   325.07 367.07
- d_age     1   325.15 367.15
- int_corr  1   325.50 367.50
- goal      5   334.13 368.13
- race      4   332.76 368.76
<none>          325.06 369.06
- samerace  1   327.31 369.31
- gender    1   328.34 370.34
- amb       1   328.52 370.52
+ date      6   315.98 371.98
- sinc      1   332.28 374.28
+ go_out    6   318.61 374.61
- fun       1   335.22 377.22
- attr      1   339.13 381.13
- prob      1   343.20 385.20
- like      1   344.64 386.64

Step:  AIC=367.06
dec ~ attr + sinc + intel + fun + amb + like + samerace + int_corr + 
    prob + race + gender + d_age + goal

           Df Deviance    AIC
- intel     1   325.07 365.07
- d_age     1   325.15 365.15
- int_corr  1   325.50 365.50
- goal      5   334.14 366.14
- race      4   332.79 366.79
<none>          325.06 367.06
- samerace  1   327.32 367.32
- gender    1   328.35 368.35
- amb       1   328.53 368.53
+ met       1   325.06 369.06
+ date      6   316.03 370.03
- sinc      1   332.33 372.33
+ go_out    6   318.63 372.63
- fun       1   335.35 375.35
- attr      1   339.14 379.14
- prob      1   343.20 383.20
- like      1   344.64 384.64

Step:  AIC=365.07
dec ~ attr + sinc + fun + amb + like + samerace + int_corr + 
    prob + race + gender + d_age + goal

           Df Deviance    AIC
- d_age     1   325.16 363.16
- int_corr  1   325.52 363.52
- goal      5   334.18 364.18
- race      4   332.93 364.93
<none>          325.07 365.07
- samerace  1   327.37 365.37
- gender    1   328.39 366.39
+ intel     1   325.06 367.06
+ met       1   325.07 367.07
- amb       1   329.16 367.16
+ date      6   316.03 368.03
+ go_out    6   318.64 370.64
- sinc      1   334.13 372.13
- fun       1   335.51 373.51
- attr      1   339.16 377.16
- prob      1   343.70 381.70
- like      1   345.03 383.03

Step:  AIC=363.16
dec ~ attr + sinc + fun + amb + like + samerace + int_corr + 
    prob + race + gender + goal

           Df Deviance    AIC
- int_corr  1   325.65 361.65
- goal      5   334.21 362.21
- race      4   333.03 363.03
<none>          325.16 363.16
- samerace  1   327.54 363.54
- gender    1   328.40 364.40
+ d_age     1   325.07 365.07
+ intel     1   325.15 365.15
+ met       1   325.15 365.15
- amb       1   329.39 365.39
+ date      6   316.04 366.04
+ go_out    6   318.66 368.66
- sinc      1   334.13 370.13
- fun       1   335.65 371.65
- attr      1   339.18 375.18
- prob      1   343.71 379.71
- like      1   345.50 381.50

Step:  AIC=361.65
dec ~ attr + sinc + fun + amb + like + samerace + prob + race + 
    gender + goal

           Df Deviance    AIC
- goal      5   334.80 360.80
<none>          325.65 361.65
- race      4   333.76 361.76
- samerace  1   327.83 361.83
- gender    1   329.04 363.04
+ int_corr  1   325.16 363.16
+ d_age     1   325.52 363.52
+ intel     1   325.63 363.63
+ met       1   325.65 363.65
- amb       1   330.19 364.19
+ date      6   316.83 364.83
+ go_out    6   319.26 367.26
- sinc      1   334.48 368.48
- fun       1   336.50 370.50
- attr      1   340.13 374.13
- prob      1   343.97 377.97
- like      1   345.82 379.82

Step:  AIC=360.8
dec ~ attr + sinc + fun + amb + like + samerace + prob + race + 
    gender

           Df Deviance    AIC
- race      4   341.62 359.62
<none>          334.80 360.80
- samerace  1   337.17 361.17
+ goal      5   325.65 361.65
- gender    1   338.03 362.03
+ int_corr  1   334.21 362.21
+ intel     1   334.73 362.73
+ d_age     1   334.74 362.74
+ met       1   334.80 362.80
- amb       1   339.03 363.03
+ date      6   325.41 363.41
+ go_out    6   328.66 366.66
- fun       1   343.75 367.75
- sinc      1   344.57 368.57
- attr      1   350.46 374.46
- like      1   354.09 378.09
- prob      1   355.28 379.28

Step:  AIC=359.62
dec ~ attr + sinc + fun + amb + like + samerace + prob + gender

           Df Deviance    AIC
<none>          341.62 359.62
- gender    1   343.92 359.92
+ int_corr  1   340.80 360.80
+ race      4   334.80 360.80
+ intel     1   341.35 361.35
+ d_age     1   341.56 361.56
+ met       1   341.58 361.58
+ goal      5   333.76 361.76
- samerace  1   345.91 361.91
- amb       1   346.01 362.01
+ date      6   332.96 362.96
- fun       1   350.17 366.17
+ go_out    6   337.13 367.13
- sinc      1   352.04 368.04
- attr      1   357.66 373.66
- like      1   360.86 376.86
- prob      1   361.32 377.32
summary(modf)

Call:
glm(formula = dec ~ attr + sinc + fun + amb + like + samerace + 
    prob + gender, family = binomial(link = "logit"), data = train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.0832  -0.7499  -0.2127   0.7489   2.7988  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -6.13455    0.89677  -6.841 7.88e-12 ***
attr         0.41123    0.10823   3.800 0.000145 ***
sinc        -0.35148    0.11119  -3.161 0.001572 ** 
fun          0.30576    0.10636   2.875 0.004044 ** 
amb         -0.20185    0.09788  -2.062 0.039191 *  
like         0.55065    0.13248   4.157 3.23e-05 ***
sameraceSI  -0.58016    0.28278  -2.052 0.040203 *  
prob         0.31221    0.07280   4.288 1.80e-05 ***
genderMale   0.41572    0.27422   1.516 0.129521    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 517.20  on 383  degrees of freedom
Residual deviance: 341.62  on 375  degrees of freedom
AIC: 359.62

Number of Fisher Scoring iterations: 5
vif(modf)
      attr       sinc        fun        amb       like sameraceSI       prob genderMale 
  1.522973   1.645558   1.602007   1.553870   1.716699   1.051078   1.095150   1.052401 
detach(train)

Costruiamo il classificatore:

soglia=0.5
valori_reali=train$dec    
valori_predetti=as.numeric(modf$fitted.values>soglia)

tab=table(valori_reali,valori_predetti)
tab
            valori_predetti
valori_reali   0   1
           0 172  48
           1  52 112
accuratezza = sum(diag(tab))/sum(tab)
accuratezza
[1] 0.7395833
specificita = tab[1,1]/(tab[1,1]+tab[1,2])
specificita
[1] 0.7818182
FPR=1-specificita 


sensitivita = tab[2,2]/(tab[2,1]+tab[2,2])
sensitivita
[1] 0.6829268
fit=modf$fitted
PRROC_obj <- roc.curve(scores.class0 = fit, weights.class0=as.numeric(paste(train$dec)),
                       curve=TRUE)


plot(PRROC_obj)
points(FPR,sensitivita,pch=4,lwd=3,cex=1.5,col='blue')


#trovare la soglia ottima: sembrerebbe essere 0.317
mycurve = roc(train$dec,modf$fitted.values)
Setting levels: control = 0, case = 1
Setting direction: controls < cases
plot(mycurve,print.thres=TRUE)


#ricalcolo le tabelle di misclassificazione
soglia=0.369
valori_reali=train$dec    
valori_predetti=as.numeric(modf$fitted.values>soglia)

tab=table(valori_reali,valori_predetti)
tab
            valori_predetti
valori_reali   0   1
           0 156  64
           1  31 133
accuratezza = sum(diag(tab))/sum(tab)
accuratezza
[1] 0.7526042
specificita = tab[1,1]/(tab[1,1]+tab[1,2])
specificita
[1] 0.7090909
FPR=1-specificita 


sensitivita = tab[2,2]/(tab[2,1]+tab[2,2])
sensitivita
[1] 0.8109756

4) DIAGNOSTICA E GOF

Check collinearità:

vif(modf)
      attr       sinc        fun        amb       like sameraceSI       prob      intel 
  1.443596   1.839755   1.759013   1.733104   1.782484   1.039053   1.111862   2.165017 

Verifichiamo che il modello ridotto non sia meno informativo del modello completo iniziale con un test anova:

anova(modf,mod0,test="Chisq")
Analysis of Deviance Table

Model 1: dec ~ attr + sinc + fun + amb + like + samerace + prob + intel
Model 2: dec ~ attr + sinc + intel + fun + amb + like + samerace + int_corr + 
    prob + race + gender + d_age + met + date + goal + go_out
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1       375      370.0                     
2       350      339.2 25   30.805   0.1956

Test di Hosmer e Lemeshow per valutare GOF modello:

hoslem.test(modf$y,fitted(modf),g=13)

    Hosmer and Lemeshow goodness of fit (GOF) test

data:  modf$y, fitted(modf)
X-squared = 11.701, df = 11, p-value = 0.3865
dim(model.matrix(modf))
[1] 384   9

Odds ratio per interpretare i coefficienti:

library(OddsPlotty)
plotty=odds_plot(modf)
In attesa che venga eseguita la profilazione...
plotty$odds_plot

Testiamo il modello fittato sul test set:

predTest <- predict(modf, test, type="response")


soglia=0.317  # threshold for categorizing predicted probabilities
predFac <- cut(predTest, breaks=c(-Inf, soglia, Inf), labels=c('0', '1'))

Tab    <- table(test$dec, predFac, dnn=c("actual", "predicted"))
Tab
      predicted
actual  0  1
     0 47 14
     1  3 32
accuratezza = sum(diag(Tab))/sum(Tab)
accuratezza
[1] 0.8229167
specificita = Tab[1,1]/(Tab[1,1]+Tab[1,2])
specificita
[1] 0.7704918
FPR=1-specificita 


sensitivita = Tab[2,2]/(Tab[2,1]+Tab[2,2])
sensitivita
[1] 0.9142857
fit=modf$fitted
PRROC_obj <- roc.curve(scores.class0 = fit, weights.class0=as.numeric(paste(train$dec)),
                       curve=TRUE)


plot(PRROC_obj)

LS0tDQp0aXRsZTogIlByb2dldHRvIFI6IFNwZWVkIGRhdGluZyBkYXRhc2V0Ig0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KTGlicmVyaWU6DQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMDUwNzAxKQ0KbGlicmFyeShmb3JlaWduKQ0KbGlicmFyeShybXMpDQpsaWJyYXJ5KGFybSkNCmxpYnJhcnkoUmVzb3VyY2VTZWxlY3Rpb24pDQpsaWJyYXJ5KHBST0MpDQpsaWJyYXJ5KFBSUk9DKQ0KbGlicmFyeShST0NSKQ0KbGlicmFyeShyZWFkcikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeShHR2FsbHkpDQpsaWJyYXJ5KGhlYXRtYXBseSkNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShncmlkRXh0cmEpDQpsaWJyYXJ5KGdncHVicikNCmxpYnJhcnkoUkNvbG9yQnJld2VyKQ0KbGlicmFyeShzY2FsZXMpDQpsaWJyYXJ5KGdnbWFwKQ0KbGlicmFyeShjb3VudHJ5Y29kZSkNCmxpYnJhcnkocmVnY2xhc3MpDQpsaWJyYXJ5KGJlc3RnbG0pDQpsaWJyYXJ5KE9kZHNQbG90dHkpDQpgYGANCg0KIyMgMSkgUFJFU0VOVEFaSU9ORSwgSU1QT1JUQVpJT05FLCBQVUxJWklBDQoNCkltcG9ydG8gZSBwdWxpc2NvIGlsIGRhdGFzZXQ6DQoNCmBgYHtyfQ0KY19kYXRhIDwtIHJlYWRfY3N2KCJTcGVlZCBEYXRpbmcgRGF0YS5jc3YiKQ0KI2VsaW1pbmlhbW8gaSBkdXBsaWNhdGkNCiNxdWkgYWJiaWFtbyBpbCBkYXRhc2V0IGNvbXBsZXRvDQpjX2RhdGEgPC0gY19kYXRhICU+JSBncm91cF9ieShpaWQpICU+JSBmaWx0ZXIgKCEgZHVwbGljYXRlZChpaWQpKQ0KDQojZmVhdHVyZXMgY2hlIGNpIGludGVyZXNzYW5vOg0KZGF0YT1zdWJzZXQoY19kYXRhLHNlbGVjdD1jKGF0dHIsc2luYyxpbnRlbCxmdW4sYW1iLHNoYXIsZGVjLGxpa2Usc2FtZXJhY2UsaW50X2NvcnIscHJvYixyYWNlLGdlbmRlcixhZ2UsYWdlX28saW5jb21lLGdvYWwsZ29fb3V0LGRhdGUsbWV0KSkNCiN2b2dsaW8gYWdnaXVuZ2VyZSBsYSB2YXJpYWJpbGUgZF9hZ2UgY29uIGxhIGRpZmZlcmVuemEgZGkgZXTDoCB0cmEgbCdpbmRpdmlkdW8gY29uc2lkZXJhdG8gZSBpbCBwYXJ0bmVyDQpkYXRhJGRfYWdlPWFicyhkYXRhJGFnZS1kYXRhJGFnZV9vKSANCg0KI3BlciBsZSBmZWF0dXJlcyBjaGUgY2kgaW50ZXJlc3Nhbm8gZWxpbWluaWFtbyBnbGkgbmENCnByaW50KHNhcHBseShkYXRhLGZ1bmN0aW9uKHgpIHN1bShsZW5ndGgod2hpY2goaXMubmEoeCkpKSkpKQ0KDQojdG9sZ28gbGEgdmFyaWFiaWxlIGluY29tZSBwZXJjaMOoIGhvIG1vbHRpIG5hIA0KZGF0YSRpbmNvbWU9TlVMTA0KZGF0YSRzaGFyPU5VTEwNCg0KI2VsaW1pbm8gZ2xpIG5hOiBlbGltaW5hdGUgNjAgb3NzZXJ2YXppb25pIA0KZGF0YSA8LSBuYS5vbWl0KGRhdGEpDQoNCiNkZWNvZGlmaWNvIHJhY2U6DQpkYXRhJHJhY2U9YXMuZmFjdG9yKGRhdGEkcmFjZSkNCmxldmVscyhkYXRhJHJhY2UpDQpkYXRhJHJhY2U9cmVjb2RlKGRhdGEkcmFjZSwgJzEnID0gJ0JsYWNrJywgJzInID0gJ1doaXRlJywnMycgPSAnSGlzcGFuaWMnLCc0JyA9ICdBc2lhbicsJzYnPSAnT3RoZXInKQ0KDQojZGVjb2RpZmljbyBnZW5kZXI6DQpkYXRhJGdlbmRlcj1hcy5mYWN0b3IoZGF0YSRnZW5kZXIpDQpsZXZlbHMoZGF0YSRnZW5kZXIpDQpkYXRhJGdlbmRlcj1yZWNvZGUoZGF0YSRnZW5kZXIsICcxJyA9ICdNYWxlJywgJzAnID0gJ0ZlbWFsZScpDQoNCiNkZWNvZGlmaWNvIGdvYWw6DQpkYXRhJGdvYWw9YXMuZmFjdG9yKGRhdGEkZ29hbCkNCmxldmVscyhkYXRhJGdvYWwpDQpkYXRhJGdvYWw9cmVjb2RlKGRhdGEkZ29hbCwgJzEnPSdGdW4nLCAnMic9J01lZXQnLCAnMyc9J0RhdGUnLCAnNCc9ICdSZWxhdGlvbnNoaXAnLCAnNSc9ICdJZGlkSXQnLCAnNic9ICdPdGhlcicpDQoNCiNkZWNvZGlmaWNvIGdvX291dDogKGFiaXR1ZGluaSBzb2NpYWxpOiBxdWFudG8gZXNjb25vIGFsbGEgc2V0dGltYW5hKQ0KZGF0YSRnb19vdXQ9YXMuZmFjdG9yKGRhdGEkZ29fb3V0KQ0KbGV2ZWxzKGRhdGEkZ29fb3V0KQ0KZGF0YSRnb19vdXQ9cmVjb2RlKGRhdGEkZ29fb3V0LCAnMSc9J1NldmVyYWxfcHcnLCAnMic9J1R3aWNlX3B3JywgJzMnPSdPbmNlX3B3JywgJzQnPSAnVHdpY2VfcG0nLCAnNSc9ICdPbmNlX3BtJywgJzYnPSAnU2V2ZXJhbF9weScsJzcnPSdBbG1vc3RfbmV2ZXInKQ0KDQojZGVjb2RpZmljbyBkYXRlOiAoYWJpdHVkaW5pIG5lZ2xpIGFwcHVudGFtZW50aTogYSBxdWFudGkgYXBwdW50YW1lbnRpIHZhbm5vKQ0KZGF0YSRkYXRlPWFzLmZhY3RvcihkYXRhJGRhdGUpDQpsZXZlbHMoZGF0YSRkYXRlKQ0KZGF0YSRkYXRlPXJlY29kZShkYXRhJGRhdGUsICcxJz0nU2V2ZXJhbF9wdycsICcyJz0nVHdpY2VfcHcnLCAnMyc9J09uY2VfcHcnLCAnNCc9ICdUd2ljZV9wbScsICc1Jz0gJ09uY2VfcG0nLCAnNic9ICdTZXZlcmFsX3B5JywnNyc9J0FsbW9zdF9uZXZlcicpDQoNCiNkZWNvZGlmaWNvIHNhbWVyYWNlDQpkYXRhJHNhbWVyYWNlPWFzLmZhY3RvcihkYXRhJHNhbWVyYWNlKQ0KbGV2ZWxzKGRhdGEkc2FtZXJhY2UpDQpkYXRhJHNhbWVyYWNlPXJlY29kZShkYXRhJHNhbWVyYWNlLCAnMCc9J05PJywgJzEnPSdTSScpDQoNCmBgYA0KDQojIyMgMikgQU5BTElTSSBFU1BMT1JBVElWQQ0KDQpDaGkgc29ubyBsZSBwZXJzb25lIHByZXNlbnRpIG5lbCBjYW1waW9uZSBjaGUgc3RpYW1vIGFuYWxpenphbmRvOg0KDQpgYGB7cn0NCmF0dGFjaChkYXRhKQ0KDQojbWFzY2hpIGUgZmVtbWluZQ0KDQptZl9maWc9Z2dwbG90KGRhdGEsYWVzKGdlbmRlcikpK2dlb21fYmFyKGFlcyhmaWxsPWdlbmRlcikpK3NjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9J1Bhc3RlbDEnKSt0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKQ0KDQoNCiNldMOgIHBlciBnZW5lcmUNCmFnZV9maWc9Z2dwbG90KGRhdGEsYWVzKGFnZSkpK2dlb21fYmFyKGFlcyhmaWxsPWdlbmRlcikpK3NjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9J1Bhc3RlbDEnKQ0KDQojZXRuaWENCmV0bmlhX2ZpZz1nZ3Bsb3QoZGF0YSxhZXMocmFjZSkpK2dlb21fYmFyKGFlcyhmaWxsPXJhY2UpKSt0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKStzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSdQYXN0ZWwxJykNCg0KI0FCSVRVRElOSSwgU0NPUEkNCiNnb19vdXQNCmdvb3V0X2ZpZz1nZ3Bsb3QoZGF0YSxhZXMoZ29fb3V0KSkrZ2VvbV9iYXIoYWVzKGZpbGw9Z29fb3V0KSkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikrc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0nUGFzdGVsMScpDQoNCiNkYXRlDQpkYXRlX2ZpZz1nZ3Bsb3QoZGF0YSxhZXMoZGF0ZSkpK2dlb21fYmFyKGFlcyhmaWxsPWRhdGUpKSt0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKStzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSdQYXN0ZWwxJykNCg0KI2dvYWwNCmdvYWxfZmlnPWdncGxvdChkYXRhLGFlcyhnb2FsKSkrZ2VvbV9iYXIoYWVzKGZpbGw9Z29hbCkpK3RoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpK3NjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9J1Bhc3RlbDEnKQ0KDQojZmlndXJhIGNvbXBsZXNzaXZhDQpnZ2FycmFuZ2UobWZfZmlnLGFnZV9maWcsZXRuaWFfZmlnLGdvb3V0X2ZpZyxkYXRlX2ZpZyxnb2FsX2ZpZykNCg0KZGV0YWNoKGRhdGEpDQpgYGANCg0KUmFwcHJlc2VudGlhbW8gaWwgbGVnYW1lIHRyYSBsZSByaXNwb3N0ZSBhbCBxdWVzdGlvbmFyaW8gZSBsYSBkZWNpc2lvbmUgZmluYWxlIGRlbGwnaW5kaXZpZHVvOg0KDQpgYGB7cn0NCmF0dGFjaChkYXRhKQ0KI2RlYyB2cyBhdHRyDQp5X2F0dHI9dGFwcGx5KGRlYyxhdHRyLG1lYW4pDQpkZWN2c2F0dHJfZmlnPWdncGxvdCgpK2dlb21fY291bnQoYWVzKGF0dHIsIGRlYykpK2dlb21fY291bnQoYWVzKHNvcnQodW5pcXVlKGF0dHIpKSwgeV9hdHRyLCBjb2xvdXI9J3JlZCcsc2l6ZT0zKSkrbGFicyh4PSdhdHRyJyx5PSdkZWMnLHRpdGxlPSdEZWMgdnMgQXR0cicpK3RoZW1lX2xpZ2h0KCkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikNCg0KI2RlYyB2cyBzaW5jIA0KeV9zaW5jPXRhcHBseShkZWMsc2luYyxtZWFuKQ0KZGVjdnNzaW5jX2ZpZz1nZ3Bsb3QoKStnZW9tX2NvdW50KGRhdGEgPSBkYXRhLCBhZXMoc2luYywgZGVjKSkrZ2VvbV9jb3VudChhZXMoc29ydCh1bmlxdWUoc2luYykpLCB5X3NpbmMsY29sb3I9J3JlZCcsc2l6ZT0zKSkrbGFicyh4PSdzaW5jJyx5PSdkZWMnLHRpdGxlPSdEZWMgdnMgU2luYycpK3RoZW1lX2xpZ2h0KCkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikNCg0KI2RlYyB2cyBpbnRlbA0KeV9pbnRlbD10YXBwbHkoZGVjLGludGVsLG1lYW4pDQpkZWN2c2ludGVsX2ZpZz1nZ3Bsb3QoKStnZW9tX2NvdW50KGRhdGEgPSBkYXRhLCBhZXMoaW50ZWwsIGRlYykpK2dlb21fY291bnQoYWVzKHNvcnQodW5pcXVlKGludGVsKSksIHlfaW50ZWwsY29sb3I9J3JlZCcsc2l6ZT0zKSkrbGFicyh4PSdpbnRlbCcseT0nZGVjJyx0aXRsZT0nRGVjIHZzIEludGVsJykrdGhlbWVfbGlnaHQoKSt0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKQ0KDQojZGVjIHZzIGZ1bg0KeV9mdW49dGFwcGx5KGRlYyxmdW4sbWVhbikNCmRlY3ZzZnVuX2ZpZz1nZ3Bsb3QoKStnZW9tX2NvdW50KGRhdGEgPSBkYXRhLCBhZXMoZnVuLCBkZWMpKStnZW9tX2NvdW50KGFlcyhzb3J0KHVuaXF1ZShmdW4pKSwgeV9mdW4sY29sb3I9J3JlZCcsc2l6ZT0zKSkrbGFicyh4PSdmdW4nLHk9J2RlYycsdGl0bGU9J0RlYyB2cyBGdW4nKSt0aGVtZV9saWdodCgpK3RoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpDQoNCiNkZWMgdnMgYW1iDQp5X2FtYj10YXBwbHkoZGVjLGFtYixtZWFuKQ0KZGVjdnNhbWJfZmlnPWdncGxvdCgpK2dlb21fY291bnQoZGF0YSA9IGRhdGEsIGFlcyhhbWIsIGRlYykpK2dlb21fY291bnQoYWVzKHNvcnQodW5pcXVlKGFtYikpLCB5X2FtYixjb2xvcj0ncmVkJyxzaXplPTMpKStsYWJzKHg9J2FtYicseT0nZGVjJyx0aXRsZT0nRGVjIHZzIEFtYicpK3RoZW1lX2xpZ2h0KCkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikNCg0KI2RlYyB2cyBsaWtlDQp5X2xpa2U9dGFwcGx5KGRlYyxsaWtlLG1lYW4pDQpkZWN2c2xpa2VfZmlnPWdncGxvdCgpK2dlb21fY291bnQoZGF0YSA9IGRhdGEsIGFlcyhsaWtlLCBkZWMpKStnZW9tX2NvdW50KGFlcyhzb3J0KHVuaXF1ZShsaWtlKSksIHlfbGlrZSxjb2xvcj0ncmVkJyxzaXplPTMpKStsYWJzKHg9J2xpa2UnLHk9J2RlYycsdGl0bGU9J0RlYyB2cyBMaWtlJykrdGhlbWVfbGlnaHQoKSt0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKQ0KDQojZGVjIHZzIHByb2INCnlfcHJvYj10YXBwbHkoZGVjLHByb2IsbWVhbikNCmRlY3ZzcHJvYl9maWc9Z2dwbG90KCkrZ2VvbV9jb3VudChkYXRhID0gZGF0YSwgYWVzKHByb2IsIGRlYykpK2dlb21fY291bnQoYWVzKHNvcnQodW5pcXVlKHByb2IpKSwgeV9wcm9iLGNvbG9yPSdyZWQnLHNpemU9MykpK2xhYnMoeD0ncHJvYicseT0nZGVjJyx0aXRsZT0nRGVjIHZzIFByb2InKSt0aGVtZV9saWdodCgpK3RoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpDQoNCiNpbW1hZ2luZSBjb21wbGV0YQ0KZ2dhcnJhbmdlKGRlY3ZzYXR0cl9maWcsZGVjdnNzaW5jX2ZpZyxkZWN2c2ludGVsX2ZpZyxkZWN2c2Z1bl9maWcsZGVjdnNhbWJfZmlnLGRlY3ZzbGlrZV9maWcsZGVjdnNwcm9iX2ZpZyxucm93PTIpDQoNCmRldGFjaChkYXRhKQ0KYGBgDQoNClZlZGlhbW8gb3JhIGNvbWUgaW5mbHVlbnphbm8gbGEgcmlzcG9zdGEgbGUgdmFyaWFiaWxpIGNhdGVnb3JpY2hlIGdlbmRlciwgcmFjZSwgc2FtZXJhY2UsIGRhdGUsIGdvYWwsIGdvX291dDoNCg0KYGBge3J9DQojcmF0ZSBvZiBwb3NpdGl2ZSBkZWMNCmF0dGFjaChkYXRhKQ0KI2dlbmRlcg0KZ2VuZGVyX3JhdGU9ZGF0YSAlPiUgDQogIGdyb3VwX2J5KGdlbmRlcikgJT4lDQogIHN1bW1hcmlzZShhY3Jvc3MoZGVjLCBtZWFuLCBuYS5ybSA9IFRSVUUpKQ0KDQpnZW5kZXJyYXRlX2ZpZz1nZ3Bsb3QoKStnZW9tX2NvbChhZXMoZ2VuZGVyX3JhdGUkZ2VuZGVyLHBlcmNlbnQoZ2VuZGVyX3JhdGUkZGVjKSxmaWxsPWdlbmRlcl9yYXRlJGdlbmRlcikpK3RoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpK3NjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9J1Bhc3RlbDEnKStsYWJzKHg9ImdlbmRlciIseT0iUEZSIikNCg0KI2V0bmlhDQpldG5pYV9yYXRlPWRhdGEgJT4lIA0KICBncm91cF9ieShyYWNlKSAlPiUNCiAgc3VtbWFyaXNlKGFjcm9zcyhkZWMsIG1lYW4sIG5hLnJtID0gVFJVRSkpDQoNCmV0bmlhcmF0ZV9maWc9Z2dwbG90KCkrZ2VvbV9jb2woYWVzKGV0bmlhX3JhdGUkcmFjZSxwZXJjZW50KGV0bmlhX3JhdGUkZGVjKSxmaWxsPWV0bmlhX3JhdGUkcmFjZSkpK3RoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpK3NjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9J1Bhc3RlbDEnKStsYWJzKHg9ImV0bmlhIix5PSJQRlIiKQ0KDQojZGF0ZQ0KZGF0ZV9yYXRlPWRhdGEgJT4lIA0KICBncm91cF9ieShkYXRlKSAlPiUNCiAgc3VtbWFyaXNlKGFjcm9zcyhkZWMsIG1lYW4sIG5hLnJtID0gVFJVRSkpDQoNCmRhdGVyYXRlX2ZpZz1nZ3Bsb3QoKStnZW9tX2NvbChhZXMoZGF0ZV9yYXRlJGRhdGUscGVyY2VudChkYXRlX3JhdGUkZGVjKSxmaWxsPWRhdGVfcmF0ZSRkYXRlKSkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikrc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0nUGFzdGVsMScpK2xhYnMoeD0iZGF0ZSIseT0iUEZSIikNCg0KI2dvYWwNCmdvYWxfcmF0ZT1kYXRhICU+JSANCiAgZ3JvdXBfYnkoZ29hbCkgJT4lDQogIHN1bW1hcmlzZShhY3Jvc3MoZGVjLCBtZWFuLCBuYS5ybSA9IFRSVUUpKQ0KDQpnb2FscmF0ZV9maWc9Z2dwbG90KCkrZ2VvbV9jb2woYWVzKGdvYWxfcmF0ZSRnb2FsLHBlcmNlbnQoZ29hbF9yYXRlJGRlYyksZmlsbD1nb2FsX3JhdGUkZ29hbCkpK3RoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpK3NjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9J1Bhc3RlbDEnKStsYWJzKHg9ImdvYWwiLHk9IlBGUiIpDQoNCiNnb19vdXQNCmdvb3V0X3JhdGU9ZGF0YSAlPiUgDQogIGdyb3VwX2J5KGdvX291dCkgJT4lDQogIHN1bW1hcmlzZShhY3Jvc3MoZGVjLCBtZWFuLCBuYS5ybSA9IFRSVUUpKQ0KDQpnb291dHJhdGVfZmlnPWdncGxvdCgpK2dlb21fY29sKGFlcyhnb291dF9yYXRlJGdvX291dCxwZXJjZW50KGdvb3V0X3JhdGUkZGVjKSxmaWxsPWdvb3V0X3JhdGUkZ29fb3V0KSkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikrc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0nUGFzdGVsMScpK2xhYnMoeD0iZ29fb3V0Iix5PSJQRlIiKQ0KDQojc2FtZXJhY2UNCnNhbWVyYWNlX3JhdGU9ZGF0YSAlPiUgDQogIGdyb3VwX2J5KHNhbWVyYWNlKSAlPiUNCiAgc3VtbWFyaXNlKGFjcm9zcyhkZWMsIG1lYW4sIG5hLnJtID0gVFJVRSkpDQoNCnNhbWVyYWNlcmF0ZV9maWc9Z2dwbG90KCkrZ2VvbV9jb2woYWVzKHNhbWVyYWNlX3JhdGUkc2FtZXJhY2UscGVyY2VudChzYW1lcmFjZV9yYXRlJGRlYyksZmlsbD1zYW1lcmFjZV9yYXRlJHNhbWVyYWNlKSkrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikrc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0nUGFzdGVsMScpK2xhYnMoeD0ic2FtZXJhY2UiLHk9IlBGUiIpDQoNCg0KI2ZpZ3VyYSBjb21wbGVzc2l2YSANCmdnYXJyYW5nZShnZW5kZXJyYXRlX2ZpZyxldG5pYXJhdGVfZmlnLGRhdGVyYXRlX2ZpZyxnb2FscmF0ZV9maWcsZ29vdXRyYXRlX2ZpZyxzYW1lcmFjZXJhdGVfZmlnKQ0KDQpkZXRhY2goZGF0YSkNCmBgYA0KDQpWYWx1dGlhbW8gbCdpbXBhdHRvIGRlbGxhIHZhcmlhYmlsZSBpbnRfY29yIHN1bGxhIGRlY2lzaW9uZSBkZWdsaSBpbmRpdmlkdWk6DQoNCmBgYHtyfQ0KYXR0YWNoKGRhdGEpDQoNCng9c2VxKC0xLDEsMC4xNSkNCm1pZD1jKCh4WzI6bGVuZ3RoKHgpXSt4WzE6KGxlbmd0aCh4KS0xKV0pLzIpDQpjbGFzc2k9Y3V0KGludF9jb3JyLGJyZWFrcz14LGluY2x1ZGUubG93ZXN0PVRSVUUscmlnaHQ9RkFMU0UpDQoNCnk9dGFwcGx5KGRlYyxjbGFzc2ksbWVhbikNCnkNCg0KZmlnPWdncGxvdCgpK2dlb21fcG9pbnQoYWVzKGludF9jb3JyLGRlYykpK2dlb21fcG9pbnQoYWVzKG1pZCx5LGNvbG9yPSdyZWQnKSkNCmZpZw0KZGV0YWNoKGRhdGEpDQpgYGANCg0KQ29uY2x1ZGlhbW8gb3NzZXJ2YW5kbyBsJ2FuZGFtZW50byBkZWxsYSBkZWNpc2lvbmUgaW4gYmFzZSBhbGwnZXTDoCBkZWkgcHJhdGVjaXBhbnRpIGUgYWxsYSBkaWZmZXJlbnphIGRpIGV0w6AgdHJhIHBhcnRlY2lwYW50aSBlIHBhcnRuZXI6DQoNCmBgYHtyfQ0KYXR0YWNoKGRhdGEpDQoNCng9c2VxKG1pbihhZ2UpLG1heChhZ2UpLDIpDQptaWQ9YygoeFsyOmxlbmd0aCh4KV0reFsxOihsZW5ndGgoeCktMSldKS8yKQ0KY2xhc3NpPWN1dChhZ2UsYnJlYWtzPXgsaW5jbHVkZS5sb3dlc3Q9VFJVRSxyaWdodD1GQUxTRSkNCg0KeT10YXBwbHkoZGVjLGNsYXNzaSxtZWFuKQ0KeQ0KDQpmaWc9Z2dwbG90KCkrZ2VvbV9jb3VudChhZXMoYWdlLGRlYykpK2dlb21fcG9pbnQoYWVzKG1pZCx5LGNvbG9yPSdyZWQnKSkNCmZpZw0KZGV0YWNoKGRhdGEpDQoNCmBgYA0KDQpgYGB7cn0NCmF0dGFjaChkYXRhKQ0KDQp4PXNlcShtaW4oZF9hZ2UpLG1heChkX2FnZSksKQ0KbWlkPWMoKHhbMjpsZW5ndGgoeCldK3hbMToobGVuZ3RoKHgpLTEpXSkvMikNCmNsYXNzaT1jdXQoZF9hZ2UsYnJlYWtzPXgsaW5jbHVkZS5sb3dlc3Q9VFJVRSxyaWdodD1GQUxTRSkNCg0KeT10YXBwbHkoZGVjLGNsYXNzaSxtZWFuKQ0KeQ0KDQpmaWc9Z2dwbG90KCkrZ2VvbV9wb2ludChhZXMobWlkLHksY29sb3I9J3JlZCcpKStnZW9tX2NvdW50KGFlcyhkX2FnZSxkZWMpKStsYWJzKHg9ImRfYWdlIix5PSJkZWMiKQ0KZmlnDQpkZXRhY2goZGF0YSkNCmBgYA0KDQpDb3JyZWxhemlvbmUgdHJhIGxlIHZhcmllIGZlYXR1cmVzOg0KDQpgYGB7cn0NClJkYXRhPXN1YnNldChkYXRhLHNlbGVjdD1jKGF0dHIscHJvYixpbnRlbCxzaW5jLGFnZSxmdW4sYW1iLGxpa2UsaW50X2NvcnIsZF9hZ2UpKQ0KI3Zpc3VhbGl6emlhbW8gbGEgY29ycmVsYXppb25lIHRyYSBsZSB2YXJpYWJpbGkgcHJlc2VudGkgbmVsIGRhdGFzZXQNCg0KaGVhdG1hcGx5X2Nvcihjb3IoUmRhdGEpLHhsYWIgPSAiRmVhdHVyZXMiLHlsYWIgPSAiRmVhdHVyZXMiLGtfY29sID0gMixrX3JvdyA9IDIsY29sb3JzID0gbWFnbWEoMTAwMCksY2VsbG5vdGU9Y29yKFJkYXRhKSxjZWxsbm90ZV90ZXh0cG9zaXRpb24gPSAibWlkZGxlIGNlbnRlciIsY2VsbG5vdGVfc2l6ZSA9IDEwKQ0KDQpgYGANCg0KIyMgMykgQ09TVFJVWklPTkUgREVMIE1PREVMTE8NCg0KUHJpbWEgZGkgaW5pemlhcmUgYSBjb3N0cnVpcmUgaWwgbW9kZWxsbyBkaXZpZGlhbW8gaWwgZGF0YXNldCBpbiB0cmFpbmluZyBzZXQgZSB0ZXN0IHNldCBwZXIgbGEgY3Jvc3MtdmFsaWRhemlvbmU6IGxhdm9yZXJlbW8gc3VsIHRyYWluaW5nIGRhdGFzZXQNCg0KYGBge3J9DQpzbXBfc2l6ZSA8LSBmbG9vcigwLjggKiBucm93KGRhdGEpKQ0KDQojIyBzZXQgdGhlIHNlZWQgdG8gbWFrZSB5b3VyIHBhcnRpdGlvbiByZXByb2R1Y2libGUNCnRyYWluX2luZCA8LSBzYW1wbGUoc2VxX2xlbihucm93KGRhdGEpKSwgc2l6ZSA9IHNtcF9zaXplKQ0KDQp0cmFpbiA8LSBkYXRhW3RyYWluX2luZCwgXQ0KdGVzdCA8LSBkYXRhWy10cmFpbl9pbmQsIF0NCmBgYA0KDQpGaXR0aWFtbyB1biBtb2RlbGxvIGRpIHJlZ3Jlc3Npb25lIGxvZ2lzdGljYSBwZXIgdmVkZXJlIHF1YWxpIHZhcmlhYmlsaSBzb25vIHNpZ25pZmljYXRpdmUgcGVyIHByZWRpcmUgbGEgZGVjaXNpb25lIGRpIHVuIGluZGl2aWR1bzoNCg0KYGBge3J9DQphdHRhY2godHJhaW4pDQoNCm1vZDA9Z2xtKGRlY35hdHRyK3NpbmMraW50ZWwrZnVuK2FtYitsaWtlK3NhbWVyYWNlK2ludF9jb3JyK3Byb2IrcmFjZStnZW5kZXIrZF9hZ2UrbWV0K2RhdGUrZ29hbCtnb19vdXQsZmFtaWx5PWJpbm9taWFsKGxpbms9ImxvZ2l0IiksdHJhaW4pDQpzdW1tYXJ5KG1vZDApDQoNCg0KDQoNCm1vZGY9c3RlcChtb2QwLGRpcmVjdGlvbj0iYm90aCIsc2NvcGU9fmF0dHIrc2luYytpbnRlbCtmdW4rYW1iK2xpa2Urc2FtZXJhY2UraW50X2NvcnIrcHJvYityYWNlK2RhdGUrZ2VuZGVyK2RfYWdlK21ldCtnb2FsK2dvX291dCApDQpzdW1tYXJ5KG1vZGYpDQoNCnZpZihtb2RmKQ0KDQpkZXRhY2godHJhaW4pDQpgYGANCg0KQ29zdHJ1aWFtbyBpbCBjbGFzc2lmaWNhdG9yZToNCg0KYGBge3J9DQpzb2dsaWE9MC41DQp2YWxvcmlfcmVhbGk9dHJhaW4kZGVjICAgIA0KdmFsb3JpX3ByZWRldHRpPWFzLm51bWVyaWMobW9kZiRmaXR0ZWQudmFsdWVzPnNvZ2xpYSkNCg0KdGFiPXRhYmxlKHZhbG9yaV9yZWFsaSx2YWxvcmlfcHJlZGV0dGkpDQp0YWINCg0KYWNjdXJhdGV6emEgPSBzdW0oZGlhZyh0YWIpKS9zdW0odGFiKQ0KYWNjdXJhdGV6emENCg0Kc3BlY2lmaWNpdGEgPSB0YWJbMSwxXS8odGFiWzEsMV0rdGFiWzEsMl0pDQpzcGVjaWZpY2l0YQ0KRlBSPTEtc3BlY2lmaWNpdGEgDQoNCg0Kc2Vuc2l0aXZpdGEgPSB0YWJbMiwyXS8odGFiWzIsMV0rdGFiWzIsMl0pDQpzZW5zaXRpdml0YQ0KDQpmaXQ9bW9kZiRmaXR0ZWQNClBSUk9DX29iaiA8LSByb2MuY3VydmUoc2NvcmVzLmNsYXNzMCA9IGZpdCwgd2VpZ2h0cy5jbGFzczA9YXMubnVtZXJpYyhwYXN0ZSh0cmFpbiRkZWMpKSwNCiAgICAgICAgICAgICAgICAgICAgICAgY3VydmU9VFJVRSkNCg0KDQpwbG90KFBSUk9DX29iaikNCnBvaW50cyhGUFIsc2Vuc2l0aXZpdGEscGNoPTQsbHdkPTMsY2V4PTEuNSxjb2w9J2JsdWUnKQ0KDQojdHJvdmFyZSBsYSBzb2dsaWEgb3R0aW1hOiBzZW1icmVyZWJiZSBlc3NlcmUgMC4zNDANCm15Y3VydmUgPSByb2ModHJhaW4kZGVjLG1vZGYkZml0dGVkLnZhbHVlcykNCnBsb3QobXljdXJ2ZSxwcmludC50aHJlcz1UUlVFKQ0KDQojcmljYWxjb2xvIGxlIHRhYmVsbGUgZGkgbWlzY2xhc3NpZmljYXppb25lDQpzb2dsaWE9MC4zNDANCnZhbG9yaV9yZWFsaT10cmFpbiRkZWMgICAgDQp2YWxvcmlfcHJlZGV0dGk9YXMubnVtZXJpYyhtb2RmJGZpdHRlZC52YWx1ZXM+c29nbGlhKQ0KDQp0YWI9dGFibGUodmFsb3JpX3JlYWxpLHZhbG9yaV9wcmVkZXR0aSkNCnRhYg0KDQphY2N1cmF0ZXp6YSA9IHN1bShkaWFnKHRhYikpL3N1bSh0YWIpDQphY2N1cmF0ZXp6YQ0KDQpzcGVjaWZpY2l0YSA9IHRhYlsxLDFdLyh0YWJbMSwxXSt0YWJbMSwyXSkNCnNwZWNpZmljaXRhDQpGUFI9MS1zcGVjaWZpY2l0YSANCg0KDQpzZW5zaXRpdml0YSA9IHRhYlsyLDJdLyh0YWJbMiwxXSt0YWJbMiwyXSkNCnNlbnNpdGl2aXRhDQoNCg0KYGBgDQoNCiMjIDQpIERJQUdOT1NUSUNBIEUgR09GDQoNCkNoZWNrIGNvbGxpbmVhcml0w6A6DQoNCmBgYHtyfQ0KdmlmKG1vZGYpDQpgYGANCg0KVmVyaWZpY2hpYW1vIGNoZSBpbCBtb2RlbGxvIHJpZG90dG8gbm9uIHNpYSBtZW5vIGluZm9ybWF0aXZvIGRlbCBtb2RlbGxvIGNvbXBsZXRvIGluaXppYWxlIGNvbiB1biB0ZXN0IGFub3ZhOg0KDQpgYGB7cn0NCmFub3ZhKG1vZGYsbW9kMCx0ZXN0PSJDaGlzcSIpDQpgYGANCg0KVGVzdCBkaSBIb3NtZXIgZSBMZW1lc2hvdyBwZXIgdmFsdXRhcmUgR09GIG1vZGVsbG86DQoNCmBgYHtyfQ0KaG9zbGVtLnRlc3QobW9kZiR5LGZpdHRlZChtb2RmKSxnPTEwKQ0KDQpkaW0obW9kZWwubWF0cml4KG1vZGYpKQ0KYGBgDQoNCk9kZHMgcmF0aW8gcGVyIGludGVycHJldGFyZSBpIGNvZWZmaWNpZW50aToNCg0KYGBge3J9DQpsaWJyYXJ5KE9kZHNQbG90dHkpDQpwbG90dHk9b2Rkc19wbG90KG1vZGYpDQpwbG90dHkkb2Rkc19wbG90DQoNCmBgYA0KDQpUZXN0aWFtbyBpbCBtb2RlbGxvIGZpdHRhdG8gc3VsIHRlc3Qgc2V0Og0KDQpgYGB7cn0NCnByZWRUZXN0IDwtIHByZWRpY3QobW9kZiwgdGVzdCwgdHlwZT0icmVzcG9uc2UiKQ0KDQoNCnNvZ2xpYT0wLjM0MCAgIyB0aHJlc2hvbGQgZm9yIGNhdGVnb3JpemluZyBwcmVkaWN0ZWQgcHJvYmFiaWxpdGllcw0KcHJlZEZhYyA8LSBjdXQocHJlZFRlc3QsIGJyZWFrcz1jKC1JbmYsIHNvZ2xpYSwgSW5mKSwgbGFiZWxzPWMoJzAnLCAnMScpKQ0KDQpUYWIgICAgPC0gdGFibGUodGVzdCRkZWMsIHByZWRGYWMsIGRubj1jKCJhY3R1YWwiLCAicHJlZGljdGVkIikpDQpUYWINCg0KYWNjdXJhdGV6emEgPSBzdW0oZGlhZyhUYWIpKS9zdW0oVGFiKQ0KYWNjdXJhdGV6emENCg0Kc3BlY2lmaWNpdGEgPSBUYWJbMSwxXS8oVGFiWzEsMV0rVGFiWzEsMl0pDQpzcGVjaWZpY2l0YQ0KRlBSPTEtc3BlY2lmaWNpdGEgDQoNCg0Kc2Vuc2l0aXZpdGEgPSBUYWJbMiwyXS8oVGFiWzIsMV0rVGFiWzIsMl0pDQpzZW5zaXRpdml0YQ0KDQpmaXQ9bW9kZiRmaXR0ZWQNClBSUk9DX29iaiA8LSByb2MuY3VydmUoc2NvcmVzLmNsYXNzMCA9IGZpdCwgd2VpZ2h0cy5jbGFzczA9YXMubnVtZXJpYyhwYXN0ZSh0cmFpbiRkZWMpKSwNCiAgICAgICAgICAgICAgICAgICAgICAgY3VydmU9VFJVRSkNCg0KDQpwbG90KFBSUk9DX29iaikNCmBgYA0KDQojIyANCg==